library(readr)
library(tidyverse)
library(scales)
library(plotly)
Full_centrality <- read_csv("Full_centrality.csv", 
    col_types = cols(X1 = col_skip(), 
                     page_id = col_character(), 
                     page_name = col_character(),
                     week = col_date(format = "%Y-%m-%d")))

page_name_map = read_csv("DATA/1000-page-info.csv",
     col_types = cols( 
                     page_id = col_character(), 
                     page_name = col_character()
                     )
     )[,1:2]


Full_centrality = Full_centrality %>% left_join(page_name_map, by='page_id') %>%relocate(page_name, .after = page_id) 

不同種類中心性

get_top_10 = function(centrality){
  Full_centrality %>% 
    filter(week == min(week)) %>% 
    arrange(desc(.data[[centrality]])) %>% 
    select(page_id) %>% head(10) %>% pull
}

plot_top_10 = function(top_10_list, centrality){
  top10.centrality.all = Full_centrality %>% filter(page_id %in% top_10_list) %>% select(page_name,.data[[centrality]], week)
  
  (top10.centrality.all %>% ggplot() + 
    geom_line(aes(y = .data[[centrality]], x = week, color=page_name)) +
    scale_x_date(labels = date_format("%Y-%m"))) %>% ggplotly
}
t10 = get_top_10('degree_centrality')
plot_top_10(t10, 'degree_centrality')

Degree Centrality

簡而言之就是總觸及率

centrality = 'degree_centrality'
top.10.deg = get_top_10(centrality)
plot_top_10(top.10.deg, centrality)

Eigenvalue Centrality

與結點互動的對象中心性越高,自己的中心性就會越高

\[ C_E^{user} = \frac{1}{\lambda} \sum_{p \in page} C_E^{page}(p) a_{ip} \\ C_E^{page} = \frac{1}{\lambda} \sum_{u \in user} C_E^{user}(u) a_{iu} \]

centrality = 'eigenvector_centrality'
top.10.eig = get_top_10(centrality)
plot_top_10(top.10.eig, centrality)
centrality = 'unweighted_eigenvector_centrality'
top.10.unw.eig = get_top_10(centrality)
plot_top_10(top.10.unw.eig, centrality)

Narmalize 後容易被 outlire 影響(10月上下,有一個用戶特別勤奮對其中一個粉專按讚,則他與那個粉專的中心性都會增加)

Closeness Centrality

到其他節點的平均距離(次數)越高,中心性越小。

centrality = 'closeness_centrality'
top.10.cls = get_top_10(centrality)
plot_top_10(top.10.cls, centrality)

互動次數在計算過程中不具意義,較不具代表性的衡量

Current Flow Betweenness Centrality

如果以其中一個粉專/用戶作為消息來源起點(source),另外一個粉專/用戶作為消息終點(sink),則關注的節點對於資訊流量的貢獻有多少?

該節點在每一對節點之間的流量總和,就是Current flow betweenness centrality。

計算上模擬電流運作 https://tinyurl.com/27dcmgj5

What happned in 2016-09-25?

#out.width="100%"}

top10.unweighted.eig.centrality.week_other = Full_centrality %>% filter(week == '2016-09-25') %>% arrange(desc(unweighted_eigenvector_centrality)) %>% select(page_name) %>% head(3) %>% pull


top10.unweighted.eig.centrality.all = Full_centrality %>% filter(page_name %in% c(top.10.unw.eig[1:3], top10.unweighted.eig.centrality.week_other)) %>% select(page_name,unweighted_eigenvector_centrality, week)


top10.unweighted.eig.centrality.all %>% ggplot() + 
  geom_line(aes(y = unweighted_eigenvector_centrality, x = week, color=page_name)) +
  scale_x_date(labels = date_format("%Y-%m-%d"))

#ggplotly(p)
#out.width="100%"}

top10.eig.centrality.week_other = Full_centrality %>% filter(week == '2016-10-02') %>% arrange(desc(eigenvector_centrality)) %>% select(page_name) %>% head(4) %>% pull


top10.eig.centrality.all = Full_centrality %>% filter(page_name %in% c(top.10.eig[1:4], top10.eig.centrality.week_other)) %>% select(page_name,unweighted_eigenvector_centrality, week)


top10.eig.centrality.all %>% ggplot() + 
  geom_line(aes(y = unweighted_eigenvector_centrality, x = week, color=page_name)) +
  scale_x_date(labels = date_format("%Y-%m-%d"))

#ggplotly(p)

社群偵測 Community Detection

將用戶粉專互動的社會網路投影到只有粉專的社會網路上,並以此進行community detection(透過 Louvain algorithm),可以大致的偵測出支持川普陣營的粉專,以及希拉蕊陣營的粉專。

以下將各個陣營的粉專的各種中心性取平均,畫出隨時間的變化圖

library(readr)
avg_centrality_panel <- read_csv("avg_centrality_panel.csv", 
    col_types = cols(week = col_date(format = "%Y-%m-%d")))
avg_centrality_panel$community = factor(
                      avg_centrality_panel$community,
                      levels = c('Trump', 'Clinton','others'))

plot_avg_centrality = function(col_name, title_name){
  avg_centrality_panel %>% ggplot() +
  geom_line(aes(x = week, y = .data[[col_name]], color = community)) +
  scale_x_date(labels = date_format("%Y-%m-%d")) + 
  ggtitle(paste(title_name, "Change")) +
  ylab(title_name)
}

plot_avg_centrality('degree_centrality', 
                    'Degree Centrality')

plot_avg_centrality('eigenvector_centrality', 
                    'Eigenvector Centrality')

plot_avg_centrality('unweighted_eigenvector_centrality', 
                    'Unweighted Eigenvector Centrality')

plot_avg_centrality('closeness_centrality', 
                    'Closeness Centrality')

移除政治人物

Full_centrality <- read_csv("Result/without_politicians/Full_centrality.csv", 
    col_types = cols(X1 = col_skip(), 
                     page_id = col_character(), 
                     page_name = col_character(),
                     week = col_date(format = "%Y-%m-%d")))
## Warning: Missing column names filled in: 'X1' [1]
## Warning: The following named parsers don't match the column names: page_name
page_name_map = read_csv("DATA/1000-page-info.csv",
     col_types = cols( 
                     page_id = col_character(), 
                     page_name = col_character()
                     )
     )[,1:2]

Full_centrality = Full_centrality %>% left_join(page_name_map, by='page_id') %>%relocate(page_name, .after = page_id) 

Degree Centrality

centrality = 'degree_centrality'
top.10.deg = get_top_10(centrality)
plot_top_10(top.10.deg, centrality)

Eigenvalue Centrality

centrality = 'eigenvector_centrality'
top.10.eig = get_top_10(centrality)
plot_top_10(top.10.eig, centrality)
centrality = 'unweighted_eigenvector_centrality'
top.10.unw.eig = get_top_10(centrality)
plot_top_10(top.10.unw.eig, centrality)

Closeness Centrality

centrality = 'closeness_centrality'
top.10.cls = get_top_10(centrality)
plot_top_10(top.10.cls, centrality)

回歸

將四種不同中心性進行panel data regression \[ \text{centrality}_{it} = \alpha + \text{fake_count_cum}_{it} \times \beta + \text{is_debate}_{it} \times \gamma + u_i \] 其中 $t 紀錄了總統大選辯論會的時間,觀察到辯論會時eigenvector centrality有大幅下降,需要把這個因子納入考量,以研究假新聞對中心性的影響。 而{it} 則紀錄了過去累計發布假新聞數量。我假設為線性增加,但centrality實際上會被Normalized,所以可以考慮取對數。

我根據粉專類別(type) 進行迴歸。

  1. figure 例如 Trump 等政治人物或記者

  2. group 例如 Occupy Democrats 等政府/非政府組織粉專

  3. media 例如 CNN 等媒體。底下又有細分電視、網路、報紙等

  4. others 演員、喜劇、歌手等

Degree Centrality


(1) (2) (3) (4) (5) (6) (7) (8)
figure group media others figure group media others

fake_posts_cum 0.000522*** 0.00000263 0.0000325 -0.0000186 0.000514*** 0.00000246 0.0000321 -0.0000186
(4.33) (0.33) (1.95) (-0.56) (4.27) (0.31) (1.93) (-0.56)
 
is_debate 0.000914** 0.000109 0.000148 -0.000118
(2.89) (1.02) (1.71) (-0.56)
 
_cons 0.00524*** 0.00269*** 0.00388*** 0.00300*** 0.00517*** 0.00268*** 0.00387*** 0.00301***
(6.57) (6.38) (13.64) (3.87) (6.48) (6.35) (13.59) (3.85)

N 1815 2933 7804 567 1815 2933 7804 567

t statistics in parentheses
* p < 0.05, ** p < 0.01, *** p < 0.001

Eigenvector Centrality


(1) (2) (3) (4) (5) (6) (7) (8)
figure group media others figure group media others

fake_posts_cum -0.000371 -0.0000934 -0.000325* 0.0000110 -0.000307 -0.0000970 -0.000319* 0.0000112
(-0.48) (-0.72) (-2.46) (0.17) (-0.40) (-0.75) (-2.42) (0.18)
 
is_debate -0.00785*** 0.00293 -0.00293*** -0.000619
(-3.77) (1.46) (-4.16) (-1.48)
 
_cons 0.0119*** 0.00524*** 0.00636*** 0.00168** 0.0124*** 0.00504*** 0.00656*** 0.00173**
(3.85) (4.51) (5.75) (3.11) (4.01) (4.29) (5.93) (3.17)

N 1815 2933 7804 567 1815 2933 7804 567

t statistics in parentheses
* p < 0.05, ** p < 0.01, *** p < 0.001

Unweighted-Eigenvector Centrality


(1) (2) (3) (4) (5) (6) (7) (8)
figure group media others figure group media others

fake_posts_cum 0.00131* -0.000126* -0.000178* -0.0000498 0.00131* -0.000128** -0.000180* -0.0000498
(2.31) (-2.53) (-2.31) (-0.47) (2.31) (-2.59) (-2.35) (-0.47)
 
is_debate -0.000296 0.00173* 0.00112** 0.000301
(-0.20) (2.57) (2.79) (0.44)
 
_cons 0.0210*** 0.00967*** 0.0124*** 0.00582*** 0.0210*** 0.00955*** 0.0123*** 0.00580***
(6.01) (8.27) (10.16) (4.08) (5.99) (8.14) (10.08) (4.04)

N 1815 2933 7804 567 1815 2933 7804 567

t statistics in parentheses
* p < 0.05, ** p < 0.01, *** p < 0.001

Closeness Centrality


(1) (2) (3) (4) (5) (6) (7) (8)
figure group media others figure group media others

fake_posts_cum 0.00163*** 0.000318*** 0.000204 0.000520 0.00162*** 0.000317*** 0.000201 0.000519
(4.10) (4.87) (1.79) (0.69) (4.07) (4.84) (1.75) (0.69)
 
is_debate 0.00117 0.000550 0.00154* -0.00628
(1.11) (0.62) (2.55) (-1.13)
 
_cons 0.301*** 0.297*** 0.310*** 0.305*** 0.301*** 0.297*** 0.310*** 0.305***
(126.17) (184.50) (291.33) (87.45) (125.99) (186.44) (291.18) (86.77)

N 1815 2933 7804 567 1815 2933 7804 567

t statistics in parentheses
* p < 0.05, ** p < 0.01, *** p < 0.001